import pandas as pd
from utils import read_sql, df_into_db


def get_exist_data():
    exist_data = read_sql("select * from total_marketcap_and_amount order by end_date desc limit 10", db_name="dataloader")
    return exist_data


# 每次更新数据前检查最后10条数据
path = "G:/data/amount_data/total_marketcap_and_amount.csv"
df = pd.read_csv(path)
null_df = df[df.isna().any(axis=1)]
check_by_date = df[df["end_date"].isin(set(null_df["end_date"]))]  # 检查发现空值的数据其实出现了两次
df = df[~df.isna().any(axis=1)]
df = df.drop_duplicates(subset=["end_date"], keep="first")
exist_data = get_exist_data()
exist_data.sort_values(by=["end_date"], ascending=True, inplace=True)
df["end_date"] = pd.to_datetime(df["end_date"]).dt.date
df = df[df["end_date"] >= exist_data.iloc[0]["end_date"]]
for i in range(len(exist_data)):
    columns = ["market_cap", "amount", "amount_reported", "btcDominance"]
    for column in columns:
        assert df.iloc[i][f"{column}"] == exist_data.iloc[i][f"{column}"]
df = df[df["end_date"] > exist_data.iloc[-1]["end_date"]]
if not df.empty:
    df_into_db(df, db_name="dataloader", table_name="total_marketcap_and_amount")
else:
    print("没有数据需要更新")